rm(list = ls())

library("mlogit")
library("Matrix")
library("dplyr")


setwd("C:/Users/at449/Desktop/replication package")

dt<-read.csv("RawPSDData.csv")


source("Functions_supply_PSD_2018.R")

#########################################################################################

#########################################################################################
########     Setthe raw Data in Logit format
#########################################################################################

#########################################################################################


dt$choicevar<- paste0(dt$borrowerID,"-", dt$Lenders,"-",dt$initial_gross_interest_rate,"-",dt$Fee,"-",dt$Max_LTV,"-",dt$Teaser_rate_years)


borrowers<-dt[c("borrowerID","Ages","gross_income","account_opened_date","Region","Max_LTV","Loan_value","is_arrear")]

menus <- dt[c("Lenders","Fee","Max_LTV","Teaser_rate_years","initial_gross_interest_rate","account_opened_date","Region")]
menus<-unique(menus)

dtlogit<-NULL
for (bid in 1:500) {
  borrowerI<-borrowers[borrowers$borrowerID==bid,]
  date<-borrowerI$account_opened_date
  Region<-borrowerI$Region
  LTV<-borrowerI$Max_LTV
  borrowerI<-borrowerI[c("borrowerID","Ages","gross_income")]
  menuI<-menus[(menus$account_opened_date==date) &(menus$Max_LTV==LTV |menus$Max_LTV==LTV+5|menus$Max_LTV==LTV-5)&(menus$Region==Region),]
  dtlogitI<-unique(merge(menuI, borrowerI ))
  dtlogit<-rbind(dtlogit,dtlogitI)
}

dtlogit$choicevar<- paste0(dtlogit$borrowerID,"-",dtlogit$Lenders,"-", dtlogit$initial_gross_interest_rate,"-",dtlogit$Fee,"-",dtlogit$Max_LTV,"-",dtlogit$Teaser_rate_years)

dtlogit$choice<- ifelse((dtlogit$choicevar%in% dt$choicevar), 1, 0)

dtlogit$IDTBP<-paste0(dtlogit$account_opened_date,"-",dtlogit$Lenders,"-",dtlogit$Fee,"-",dtlogit$Max_LTV,"-",dtlogit$Teaser_rate_years,"-",dtlogit$Region,"-",dtlogit$initial_gross_interest_rate)
dt$IDTBP<-paste0(dt$account_opened_date,"-",dt$Lenders,"-",dt$Fee,"-",dt$Max_LTV,"-",dt$Teaser_rate_years,"-",dt$Region,"-",dt$initial_gross_interest_rate)

#########################################################################################

#########################################################################################
################### Demand Regressions
#########################################################################################

#########################################################################################




############Descriptive stats####################
### regression of LTV on borrower characteristics:


####### Generate the statistics for Table 1
SumstatLTVA<-lm(Ages~factor(Max_LTV),data=dt)
summary(SumstatLTVA)
SumstatLTVAN<-lm(Applicants_number~factor(Max_LTV),data=dt)
summary(SumstatLTVAN)
SumstatLTVGI<-lm(gross_income~factor(Max_LTV),data=dt)
summary(SumstatLTVGI)
SumstatLTVSE<-lm(selfEmployed~factor(Max_LTV),data=dt)
summary(SumstatLTVSE)

############################

####### Generate the descriptives statistics for Table 2
descriptive_stats <- dt %>% summarise(across(where(is.numeric), 
                                   list(mean = mean, std = sd, min = min, max = max), 
                                   na.rm = TRUE))
summary(descriptive_stats )
############################


####### Generate the descriptives statistics for Table 3
dt$LTV90<-0
dt$LTV90[dt$Max_LTV>=90]<-1
defaultmodelT3<-lm(is_arrear~LTV90 + Max_LTV +Teaser_rate_years+ Fee+initial_gross_interest_rate+Ages+Loan_to_income+factor(account_opened_date)+factor(Region)+factor(Lenders),data=dt)
summary(defaultmodelT3 )

defaultmodelT3<-lm(is_arrear~LTV90 + Max_LTV +Teaser_rate_years+ Fee+initial_gross_interest_rate+Ages+Loan_to_income+factor(account_opened_date)+factor(Region)+factor(Lenders),data=dt[dt$mortgageholiday==1,])
summary(defaultmodelT3 )
############################


############Estimation stats####################


dttt<- data.frame(dtlogit$borrowerID,dtlogit$IDTBP)
dtun<-unique(dttt)


####logit model

TM_s <- mlogit.data(as.data.frame(dtlogit), choice = "choice", shape = "long", chid.var = "borrowerID", alt.var = "IDTBP")


#without random coefficient
model2<-mlogit(choice ~Teaser_rate_years + factor(Lenders) +initial_gross_interest_rate +Max_LTV+ factor(Fee)
               +initial_gross_interest_rate:gross_income
               +Max_LTV:gross_income
               +Teaser_rate_years:gross_income
               | - 1, TM_s)
summary(model2)

#check results
coef_logit<-as.data.frame(t(coef(model2)))
rate_coef<-coef_logit$initial_gross_interest_rate + TM_s[TM_s$choice==1,]$gross_income*coef_logit$`initial_gross_interest_rate:gross_income`
LTV_coef<-coef_logit$Max_LTV + TM_s[TM_s$choice==1,]$gross_income*coef_logit$`Max_LTV:gross_income`
teaser_coef<-coef_logit$Teaser_rate_years + TM_s[TM_s$choice==1,]$gross_income*coef_logit$`Teaser_rate_years:gross_income`
mean(rate_coef)
summary(lm(initial_gross_interest_rate~factor(Max_LTV),data=TM_s[TM_s$choice==1,]))
WTP<-LTV_coef/rate_coef
quantile(WTP)
hist(WTP)
quantile(rate_coef)
hist(rate_coef)

#with random coefficient

#choose a random sample of borrowers (for the code to go faster in the replication package)
ID<-dtlogit[dtlogit$choice==1,]$borrowerID
sample_ID<-ID[sample(length(ID), size_subsample<-100)]
smplTM<-dtlogit[dtlogit$borrowerID %in% sample_ID,]
smplTM <- mlogit.data(as.data.frame(smplTM), choice = "choice", shape = "long", chid.var = "borrowerID", alt.var = "IDTBP")

#  R is set to 2 for the code to go faster in the replication package
modelRC<-mlogit(choice ~Teaser_rate_years + factor(Lenders) +initial_gross_interest_rate +Max_LTV+ factor(Fee)
                +initial_gross_interest_rate:gross_income
                +Max_LTV:gross_income
                +Teaser_rate_years:gross_income
                | - 1, smplTM,panel = FALSE, rpar = c(Max_LTV = "n",Teaser_rate_years= "n",initial_gross_interest_rate="n"), R = 2,
                correlation = FALSE, halton = NA, method = "bhhh")



###############Results for Table 5-6-7
summary(modelRC)



VARLTV<-(modelRC$coefficients[15])^2
VARRate<-(modelRC$coefficients[14])^2
VARTeaser<-(modelRC$coefficients[13])^2
###############

#####construct E[Beta|X]

dtpred <- predict(model2, newdata=TM_s)
dtt<-as.data.frame(t(dtpred))
dtMen<-unique(data.frame(dtlogit$Max_LTV,dtlogit$Teaser_rate_years,dtlogit$initial_gross_interest_rate,dtlogit$IDTBP))
dtMen<-dtMen[order(dtMen$dtlogit.IDTBP),]
sumLTV<-NULL
sumteaser<-NULL
sumrate<-NULL

for (i in 1:length(dt$borrowerID)) {
  sumLTV[i]<-sum(dtMen[,1] *dtt[,i])
  sumteaser[i]<-sum(dtMen[,2] *dtt[,i])
  sumrate[i]<-sum(dtMen[,3] *dtt[,i])
}
dtsum<-data.frame(sumLTV,sumteaser,sumrate)

dt$RC.LTV<-(1+ dt$Max_LTV-dtsum$sumLTV)*VARLTV
dt$RC.Teaser<-(1+ dt$Teaser_rate_years-dtsum$sumteaser)*VARTeaser
dt$RC.Rate<-(1+ dt$initial_gross_interest_rate-dtsum$sumrate)*VARRate

####Loan size model Results for Table 8

dt$logL<-log(dt$Loan_value)

modelLoan<-lm(logL~RC.LTV+RC.Rate+ RC.Teaser+  Teaser_rate_years +factor(Lenders) +initial_gross_interest_rate +Max_LTV+ factor(Fee)+gross_income+Ages+ factor(Region) ,data=dt)

####### Generate the statistics for Table 8
summary(modelLoan)

####Default model Results for Table 9

#Do the default regression

modelDefault<-lm(is_arrear~RC.LTV+RC.Rate+ RC.Teaser+  Teaser_rate_years +factor(Lenders) +initial_gross_interest_rate +Max_LTV+ factor(Fee)+gross_income+Ages+ factor(Region),data=dt)
####### Generate the statistics for Table 9
summary(modelDefault)

#########################################################################################

#########################################################################################
###################  Supply estimation
#########################################################################################

#########################################################################################


###create bank index
dt$BANKID<-NULL
i<-1
for(id in sort(unique(dt$Lenders)) ){
  dt$BANKID[dt$Lenders==id]<-i
  dtlogit$BANKID[dtlogit$Lenders==id]<-i
  i<-i+1
}

TM2<-mlogit.data(as.data.frame(dtlogit), choice = "choice", shape = "long", chid.var = "borrowerID", alt.var = "IDTBP")
df<-dt

#########################################################################################
# create the dataframes to calculate expected profits for each bank-time-product 
#########################################################################################

#list indexed by banks. contains dataframe with all borrowers prob of choosing a product
results_ep<-create_list_expected_profits_2018(TM2,df)
my.list.expected.profits<-results_ep[[1]]
mean_alpha<-results_ep[[2]]
alpha_d<-results_ep[[3]]




################################################################
# Calculate the marginal costs
###############################################################



B_i_r<-NULL
B_i_r_weight<-NULL
B_i_r[1]<- 0
B_i_r[2]<- 0
B_i_r[3]<- 0
B_i_r_weight[1]<-1
B_i_r_weight[2]<-1
B_i_r_weight[3]<-1

marginal_cost_dt<-NULL
mc_LTV<-NULL
mc_fees<-NULL
mc_teaser<-NULL
mc_firm<-NULL
mc_time<-NULL
Marginal_Costs<-list(list())
MC<-list(list())

for (t in 1:1) {#max(data$timeID)
  for (b in sort(unique(df$BANKID))) {
    list_a<-list(my.list.expected.profits[[b]],my.list.expected.profits[[b]],my.list.expected.profits[[b]])

    Matrices<-create_Matrices_2018(b,list_a,alpha_d,mean_alpha)# time is not used atm
    
    tilde_LambdaD_jt <-Matrices[[1]]
    if (!is.na(tilde_LambdaD_jt[1])) {
      tilde_LambdaD_jt <-Matrices[[1]]
      tilde_GammaD_jt<-Matrices[[2]]
      
      Lambda_jt_n <-Matrices[[3]]
      Gamma_jt_n<-Matrices[[4]]
      Q_n<-Matrices[[5]]
      
      # tilde_Q_n<-Matrices[[4]]
      R_n <-Matrices[[6]] # create matrix with columns rep r
      One_M<- Matrices[[9]]
      
      A2<-tilde_LambdaD_jt - tilde_GammaD_jt
      
      #
      
      B2<- (Lambda_jt_n - Gamma_jt_n  )  %*%  R_n   +  Q_n  %*%  One_M
      MC[[b]] <-solve (  A2 ,  B2)
      
      
      LTV <-Matrices[[8]] #return the LTVS
      FEES<-Matrices[[10]]  #check that it is right
      TEASER<-Matrices[[11]]
      #create the dataset of marginal costs
      MCC<-MC[[b]][MC[[b]] != 0]
      marginal_cost_dt<-c(marginal_cost_dt, MCC) # block diagobnal matrix
      
      mc_LTV<-c(mc_LTV,   as.vector(unlist(LTV) )  )
      mc_fees<-c(mc_fees,   as.vector(unlist(FEES) )  )
      mc_teaser<-c(mc_teaser,   as.vector(unlist(TEASER) )  )
      mc_firm<-c(mc_firm, rep(b,length(MCC))) 
      mc_time<-c(mc_time, rep(t,length(MCC)))
    }else{
      MC[[b]] <-NA
    }
  }
  Marginal_Costs[[t]]<- MC
}



############# 
#save the marginal costs in a dataframe
Data_MC<-data.frame(marginal_cost_dt,mc_LTV,mc_fees,mc_teaser,mc_firm,mc_time)



#################Analysis of Marginal costs


# do a regression to see how margina costs depends on product characteristics
Marginal_Costs_regression<- lm(marginal_cost_dt~ mc_LTV+mc_teaser+factor(mc_firm) , data = Data_MC)

##### Table 10
summary(Marginal_Costs_regression)

###############


results<-summary(Marginal_Costs_regression)

param_MC<-results[[4]][,1] #vector containing the estimated coefficients

Mrg_cst<-marginal_cost_dt
summary(Mrg_cst[Mrg_cst>=0 &Mrg_cst<=20])
hist(Mrg_cst[Mrg_cst>=0 &Mrg_cst<=20])
hist(Mrg_cst)

###########Do the decomposition of rate into marginal costs, competition, cross subsidy for Figure 5

Data_MC$round_LTV<-round(Data_MC$mc_LTV/5)*5
Data_MC$round_fees<-round(Data_MC$mc_fees/500)*500
Data_MC$product_type<-paste(Data_MC$round_LTV,Data_MC$mc_teaser,Data_MC$round_fees, sep="-")
Data_MC$bankID<-Data_MC$mc_firm

dt$round_LTV<-round(dt$Max_LTV/5)*5
dt$round_fees<-round(dt$Fee/500)*500
dt$product_type<-paste(dt$round_LTV,dt$Teaser_rate_years,dt$round_fees, sep="-")


unique_product_MC<- Data_MC %>% group_by(bankID,product_type,round_LTV,round_fees,mc_teaser)
dtdef<-dt
dtdef$predict_Default<-predict(modelDefault,newdata=dtdef)
dtdef$bankID<-dtdef$BANKID
unique_product_data<-dtdef %>% group_by(BANKID,product_type,round_LTV,round_fees,Teaser_rate_years)%>%summarise(initial_rate=mean(initial_gross_interest_rate),avg_a= 2.383892e-01+mean(gross_income*(-3.196047e-05)),survival=1-mean(predict_Default))
m.MC.DT<-merge(unique_product_MC,unique_product_data, by="product_type")
# m.MC.DT<-unique_product_MC

IDTBP<-unique(m.MC.DT$product_type)
mod.price<-lm(initial_rate~product_type + factor(BANKID),data=m.MC.DT)


mod<-lm(-avg_a~product_type + factor(BANKID),data=m.MC.DT)
mu<-lm((1/(-avg_a))~product_type+factor(BANKID),data=m.MC.DT)


m.MC.DT$MU<-m.MC.DT$initial_rate -( m.MC.DT$marginal_cost_dt)
m.MC.DT$MUdef<-m.MC.DT$initial_rate -( m.MC.DT$marginal_cost_dt/(m.MC.DT$survival))
m.MC.DT$cross_sub<-m.MC.DT$initial_rate -( m.MC.DT$marginal_cost_dt    +1/(-m.MC.DT$avg_a)  )
m.MC.DT$cross_sub2<-m.MC.DT$initial_rate -( m.MC.DT$marginal_cost_dt/(m.MC.DT$survival)    +1/(-m.MC.DT$avg_a)  )

m.MC.DT$markupmeasure<-m.MC.DT$MU/m.MC.DT$initial_rate
mean(m.MC.DT$markupmeasure)


summary(lm(MU~product_type,data=m.MC.DT))
summary(lm(MUdef~product_type,data=m.MC.DT))
summary(lm(cross_sub~product_type,data=m.MC.DT))
summary(lm(cross_sub2~product_type,data=m.MC.DT))

#########Create Figure 5

#the marginal cost
mc70<-mean(m.MC.DT$MUdef[m.MC.DT$round_LTV.x<=75&m.MC.DT$round_LTV.x>=70])
mc85<-mean(m.MC.DT$MUdef[m.MC.DT$round_LTV.x<=85&m.MC.DT$round_LTV.x>=80])
mc95<-mean(m.MC.DT$MUdef[m.MC.DT$round_LTV.x<=95&m.MC.DT$round_LTV.x>=90])

#the perfect inf MU

PI70<-mean(1/(-m.MC.DT$avg_a[m.MC.DT$round_LTV.x<=75&m.MC.DT$round_LTV.x>=70]))
PI85<-mean(1/(-m.MC.DT$avg_a[m.MC.DT$round_LTV.x<=85&m.MC.DT$round_LTV.x>=80]))
PI95<-mean(1/(-m.MC.DT$avg_a[m.MC.DT$round_LTV.x<=95&m.MC.DT$round_LTV.x>=90]))

#the cross-subsidy

CS70<-mean(m.MC.DT$cross_sub2[m.MC.DT$round_LTV.x<=75&m.MC.DT$round_LTV.x>=70])
CS85<-mean(m.MC.DT$cross_sub2[m.MC.DT$round_LTV.x<=85&m.MC.DT$round_LTV.x>=80])
CS95<-mean(m.MC.DT$cross_sub2[m.MC.DT$round_LTV.x<=95&m.MC.DT$round_LTV.x>=90])

x<-c(75,85,95,75,85,95,75,85,95)
group<-c("perfect information perfect competition rate","perfect information perfect competition rate","perfect information perfect competition rate","Perfect information imperfect competition markup","Perfect information imperfect competition markup","Perfect information imperfect competition markup","Assymetric information premium/discount","Assymetric information premium/discount","Assymetric information premium/discount")
value<-c(mc70,mc85,mc95,PI70,PI85,PI95,CS70,CS85,CS95)


datamcdecomp<- data.frame(x,group,value)

ggplot(datamcdecomp, aes(fill=group, y=value, x=x)) + 
  geom_bar(position="stack", stat="identity")+
xlab("LTV") +
  ylab("Basis points")
ggsave("InterestRateDeco.png")


#################################################################
################################################################
# Estimate the fixed costs
###############################################################
#################################################################

#################################################################
## First step: calculate the best response when one product is introduced
#################################################################

###################### standardize the menu and create potential menus full-menu
product_data<-unique(df[,c("IDTBP","Fee","Max_LTV","initial_gross_interest_rate","Teaser_rate_years","BANKID")])

product_data$round_LTV<-product_data$Max_LTV
product_data$round_fees<-product_data$Fee
product_data$product_type<-paste(product_data$round_LTV,product_data$Teaser_rate_years,product_data$round_fees, sep="-")

unique_product_data<-product_data %>% group_by(BANKID,product_type,round_LTV,round_fees,Teaser_rate_years)%>%summarise(initial_gross_interest_rate=mean(initial_gross_interest_rate))
unique_product_data$offered_products<-1

#predict the price of the new products:
price_model<-lm(initial_gross_interest_rate~ factor(BANKID)+ factor(round_LTV)+factor(round_fees)+factor(Teaser_rate_years),data=unique_product_data )
summary(price_model)

price_model2<-lm(initial_gross_interest_rate~ factor(BANKID)+ round_LTV+factor(round_fees)+factor(Teaser_rate_years),data=unique_product_data )
summary(price_model2)

#create full-menu offer:
b<-1:5
f<-c(1000,400,200)
ltv<-c(45,50,55,60,65,70,75,80,85,90,95)
tsr<-c(2,3,5,10)
complete_menu<-expand.grid(b,ltv,f,tsr)
complete_menu<-complete_menu %>% 
  rename(
    BANKID=Var1,
    round_LTV=Var2,
    round_fees = Var3,
    Teaser_rate_years=Var4
  )

complete_menu$predict_rate<-predict(price_model,newdata=complete_menu)

#merge the actual and complete menu
merged_menu<-merge(complete_menu,unique_product_data,all.x = "TRUE",by=c("BANKID","round_LTV","round_fees","Teaser_rate_years"))
merged_menu$offered_products[is.na(merged_menu$offered_products)]<-0
merged_menu$product_type<-paste(merged_menu$round_LTV,merged_menu$Teaser_rate_years,merged_menu$round_fees, sep="-")
merged_menu$initial_gross_interest_rate[is.na(merged_menu$initial_gross_interest_rate)]<-merged_menu$predict_rate[is.na(merged_menu$initial_gross_interest_rate)]

merged_menu$IDTBP<-paste0(merged_menu$account_opened_date,"-",merged_menu$BANKID,"-",merged_menu$round_fees,"-",merged_menu$round_LTV,"-",merged_menu$Teaser_rate_years,"-",merged_menu$Region,"-",merged_menu$initial_rate)

m<-merged_menu[merged_menu$offered_products==0,]
nrow(m) # number of new product that could be offered

###########################################create a new TM2 and df with a new product
source("Functions_supply_PSD_2018.R")
subdt<-as.data.frame(dtlogit)


#create a list of datasets with the new products
l<-create_list_TM_df_new_prod(TM2,merged_menu,subdt)
TM2NEW.list<-l[[1]]
dfNEW.list<-l[[2]]



#####Calculate the list of counterfactual profits for alternative menus offered

#just do it for the first 2 product for replication
s<-NULL
for (p in 1:80) {
dfnew<-dfNEW.list[[p]]
dfnew$RC.LTV<-(1+ dt$Max_LTV-dtsum$sumLTV)*VARLTV
dfnew$RC.Teaser<-(1+ dt$Teaser_rate_years-dtsum$sumteaser)*VARTeaser
dfnew$RC.Rate<-(1+ dt$initial_gross_interest_rate-dtsum$sumrate)*VARRate

TM2NEW<-TM2NEW.list[[p]]


#calculate the profits with the new product

resultslist<-create_list_expected_profits_2018(TM2NEW.list[[p]],dfnew)

my.list.expected.profits<-resultslist[[1]]
mean_alpha<-resultslist[[2]]
alpha_d<-resultslist[[3]]

#chose the bank that created a new product
b<-merged_menu[merged_menu$offered_products==0,][p,]$BANKID
list_a<-my.list.expected.profits
s[[p]]<-solve_counterfactual_2018(list_a,alpha_d,mean_alpha,TM2NEW,dfnew)
}

###############################################################
## Second step: use the logit model to recover the fixed cost
###############################################################

#profits for realized data

#initialize data
Bank<-NULL
MenuID<-NULL
Choice<-NULL
Profits<-NULL
NBProd<-NULL


for (b in 1:80) {
  ### Calculate profits with Menus in the data
  B_ID<-s[[b]][[4]]$BANKID[1]
  dtBk1<-my.list.expected.profits[[B_ID]]
  mc_LTV<-dtBk1$Max_LTV
  mc_teaser<-dtBk1$Teaser_rate_years
  mc_firm<-rep(B_ID, length(dtBk1$Teaser_rate_years))
  dtBMC<-data.frame(mc_LTV,mc_teaser,mc_firm)
  ID<-paste0(toString(B_ID),"-",toString(b))
  
  ProfitsR<-sum(dtBk1$share*dtBk1$predict_Loan*((1-dtBk1$predict_Default)*dtBk1$initial_gross_interest_rate-predict(Marginal_Costs_regression,dtBMC)))
  
  #Calculate profits with counterfactual
  
  B_ID<-s[[b]][[4]]$BANKID[1]
  list.P<-s[[b]][[2]]
  dtBk1<-list.P[[B_ID]]
  mc_LTV<-dtBk1$Max_LTV
  mc_teaser<-dtBk1$Teaser_rate_years
  mc_firm<-rep(B_ID, length(dtBk1$Teaser_rate_years))
  dtBMC<-data.frame(mc_LTV,mc_teaser,mc_firm)
  
  ProfitsC<-sum(dtBk1$share*dtBk1$predict_Loan*((1-dtBk1$predict_Default)*dtBk1$initial_gross_interest_rate-predict(Marginal_Costs_regression,dtBMC)))
  
  #Set up the logit to estimate the fixed cost
  
  
  ####logit model
  
  #create data
  Bank<-c(Bank,ID,ID)
  MenuID<-c(MenuID,1,2)
  Choice<-c(Choice,1,0)
  Profits<-c(Profits,ProfitsR,ProfitsC)
  
  if (b==1) {
    NBProd<-c(NBProd,0,1)
  }else{
    NBProd<-c(NBProd,0,1)
  }
}


dt.profit<-data.frame(Bank, MenuID, Choice,Profits,NBProd)
dt.profit$Profits[is.na(dt.profit$Profits)]<-0
dt.profitL <- mlogit.data(as.data.frame(dt.profit), choice = "Choice", shape = "long", chid.var = "Bank", alt.var = "MenuID")

#logit cost
modelFC<-mlogit(Choice ~Profits+NBProd
               | - 1, dt.profitL)

##### Table 11
summary(modelFC)




